A Manhattan plot is a specific type of scatter plot widely used in genomics to study GWAS results (Genome Wide Association Study).

Each point represents a genetic variant. The X axis shows its position on a chromosome, the Y axis tells how much it is associated with a trait.

Basic Manhattan plot

library(qqman)

head(gwasResults)
##   SNP CHR BP         P
## 1 rs1   1  1 0.9148060
## 2 rs2   1  2 0.9370754
## 3 rs3   1  3 0.2861395
## 4 rs4   1  4 0.8304476
## 5 rs5   1  5 0.6417455
## 6 rs6   1  6 0.5190959

# Make the Manhattan plot on the gwasResults dataset
manhattan(gwasResults, chr="CHR", bp="BP", snp="SNP", p="P" )

Highlight a group of SNP

snpsOfInterest
##   [1] "rs3001" "rs3002" "rs3003" "rs3004" "rs3005" "rs3006" "rs3007" "rs3008" "rs3009" "rs3010" "rs3011" "rs3012" "rs3013"
##  [14] "rs3014" "rs3015" "rs3016" "rs3017" "rs3018" "rs3019" "rs3020" "rs3021" "rs3022" "rs3023" "rs3024" "rs3025" "rs3026"
##  [27] "rs3027" "rs3028" "rs3029" "rs3030" "rs3031" "rs3032" "rs3033" "rs3034" "rs3035" "rs3036" "rs3037" "rs3038" "rs3039"
##  [40] "rs3040" "rs3041" "rs3042" "rs3043" "rs3044" "rs3045" "rs3046" "rs3047" "rs3048" "rs3049" "rs3050" "rs3051" "rs3052"
##  [53] "rs3053" "rs3054" "rs3055" "rs3056" "rs3057" "rs3058" "rs3059" "rs3060" "rs3061" "rs3062" "rs3063" "rs3064" "rs3065"
##  [66] "rs3066" "rs3067" "rs3068" "rs3069" "rs3070" "rs3071" "rs3072" "rs3073" "rs3074" "rs3075" "rs3076" "rs3077" "rs3078"
##  [79] "rs3079" "rs3080" "rs3081" "rs3082" "rs3083" "rs3084" "rs3085" "rs3086" "rs3087" "rs3088" "rs3089" "rs3090" "rs3091"
##  [92] "rs3092" "rs3093" "rs3094" "rs3095" "rs3096" "rs3097" "rs3098" "rs3099" "rs3100"

manhattan(gwasResults, highlight = snpsOfInterest)

Annotate SNPs below specified p-value

manhattan(gwasResults, annotatePval = 0.01)

Highly customisable with ggplot2

library(ggplot2)
library(dplyr)

data <- gwasResults %>% 
  
  # Compute chromosome size
  group_by(CHR) %>% 
  summarise(chr_len=max(BP)) %>% 
  
  # Calculate cumulative position of each chromosome
  mutate(tot=cumsum(chr_len)-chr_len) %>%
  select(-chr_len) %>%
  
  # Add this info to the initial dataset
  left_join(gwasResults, ., by=c("CHR"="CHR")) %>%
  
  # Add a cumulative position of each SNP
  arrange(CHR, BP) %>%
  mutate( BPcum=BP+tot)

head(data)
##   SNP CHR BP         P tot BPcum
## 1 rs1   1  1 0.9148060   0     1
## 2 rs2   1  2 0.9370754   0     2
## 3 rs3   1  3 0.2861395   0     3
## 4 rs4   1  4 0.8304476   0     4
## 5 rs5   1  5 0.6417455   0     5
## 6 rs6   1  6 0.5190959   0     6
axisdf <- data %>%
  group_by(CHR) %>%
  summarize(center=(max(BPcum)+min(BPcum))/2)

head(axisdf)
## # A tibble: 6 × 2
##     CHR center
##   <int>  <dbl>
## 1     1   750.
## 2     2  2096 
## 3     3  3212.
## 4     4  4204 
## 5     5  5115 
## 6     6  5966

ggplot(data, aes(x=BPcum, y=-log10(P))) +
  geom_point( aes(color=as.factor(CHR)), alpha=0.8, size=1.3) +
  scale_color_manual(values = rep(c("#4197d8","#f8c120","#413496","#495226", "#d60b6f","#e66519",
                                    "#d581b7","#83d3ad", "#7c162c","#26755d"), 3)) +
  scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) +
  scale_y_continuous(expand = c(0, 0) ) +     # remove space between plot area and x axis
  theme_bw() +
  theme(legend.position="none", panel.border = element_blank(),panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank()
  )

Highlight SNPs use ggrepel

library(ggrepel)

data <- gwasResults %>% 
  # Compute chromosome size
  group_by(CHR) %>% 
  summarise(chr_len=max(BP)) %>% 
  
  # Calculate cumulative position of each chromosome
  mutate(tot=cumsum(chr_len)-chr_len) %>%
  select(-chr_len) %>%
  
  # Add this info to the initial dataset
  left_join(gwasResults, ., by=c("CHR"="CHR")) %>%
  
  # Add a cumulative position of each SNP
  arrange(CHR, BP) %>%
  mutate( BPcum=BP+tot) %>%
  
  # Add highlight and annotation information
  mutate( is_highlight=ifelse(SNP %in% snpsOfInterest, "yes", "no")) %>%
  mutate( is_annotate=ifelse(-log10(P)>4, "yes", "no")) 

head(data)
##   SNP CHR BP         P tot BPcum is_highlight is_annotate
## 1 rs1   1  1 0.9148060   0     1           no          no
## 2 rs2   1  2 0.9370754   0     2           no          no
## 3 rs3   1  3 0.2861395   0     3           no          no
## 4 rs4   1  4 0.8304476   0     4           no          no
## 5 rs5   1  5 0.6417455   0     5           no          no
## 6 rs6   1  6 0.5190959   0     6           no          no
# Prepare X axis
axisdf <- data %>% group_by(CHR) %>%
  summarize(center=(max(BPcum)+min(BPcum))/2)
head(axisdf)
## # A tibble: 6 × 2
##     CHR center
##   <int>  <dbl>
## 1     1   750.
## 2     2  2096 
## 3     3  3212.
## 4     4  4204 
## 5     5  5115 
## 6     6  5966

ggplot(data, aes(x=BPcum, y=-log10(P))) +
  geom_point( aes(color=as.factor(CHR)), alpha=0.8, size=1.3) +
  scale_color_manual(values = rep(c("#4197d8","#f8c120","#413496","#495226", "#d60b6f","#e66519",
                                  "#d581b7","#83d3ad", "#7c162c","#26755d"), 3)) +
  scale_x_continuous( label = axisdf$CHR, breaks= axisdf$center ) +
  scale_y_continuous(expand = c(0, 0) ) +     # remove space between plot area and x axis
  geom_point(data=subset(data, is_highlight=="yes"), color="orange", size=2) +
  geom_label_repel( data=subset(data, is_annotate=="yes"), aes(label=SNP), size=2) + theme_bw() +
  theme( legend.position="none", panel.border = element_blank(), panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank())

Circular version of Manhattan plot

library("CMplot")
par(mar = c(0, 0, 0, 0))
CMplot(gwasResults,plot.type="c",r=3,outward=TRUE,file.output=FALSE,chr.labels=seq(1,22),verbose=FALSE)

Compare the p-values of several traits

data(pig60K)

head(pig60K)
##           SNP Chromosome Position    trait1     trait2     trait3
## 1 ALGA0000009          1    52297 0.7738187 0.51194318 0.51194318
## 2 ALGA0000014          1    79763 0.7738187 0.51194318 0.51194318
## 3 ALGA0000021          1   209568 0.7583016 0.98405289 0.98405289
## 4 ALGA0000022          1   292758 0.7200305 0.48887140 0.48887140
## 5 ALGA0000046          1   747831 0.9736840 0.22096836 0.22096836
## 6 ALGA0000047          1   761957 0.9174565 0.05753712 0.05753712

par(mar = c(0, 0, 0, 0))
CMplot(pig60K, plot.type="c", chr.labels=c(1:18,"X","Y"), r=0.1, outward=FALSE, cir.chr.h=1.3,
       file.output=FALSE, verbose = FALSE)